  # ------------------------------------- #
# Replication code for:
#
# Rathbun, Brian C. and Caleb Pomeroy "See No Evil, Speak No Evil? Morality, Evolutionary Psychology, 
# and the Nature of International Relations," International Organization.
#
# This script reproduces the Russian build-a-threat results.
# ------------------------------------- #


# --- set your working directory --- #
setwd("~/Downloads/seeing_evil_replication/")

# --- Libraries --- #
library(ggplot2)
library(readr)
library(quanteda)
library(igraph)
library(dplyr)

# the data consist of a 1245 subject x 9 traits dataframe, with individual vs. country condition assignment in the final column.
# the cells are populated with the rank importance assigned to that trait by the subject (w/ "1" indicating most important and up to "9" indicating least important)
person_country_attributes <- read_csv("data/russia_data.csv") 


# --- Ranks by condition
att_df <- reshape2::melt(person_country_attributes)
att_df$value <- factor(att_df$value)
att_df <- na.omit(att_df) # NAs here simply indicate that a given subject did not include that trait in their composite threat image. they don't indicate missing data.

levels(att_df$variable) <-
  list("Honest/Trustworthy" = "Honest", 
       "Fair/Just" = "Fair",
       "Intelligent" = "Intelligent",
       "Friendly/Considerate" = "Friendly", 
       "Organized/Competent" = "Organized",
       "Powerful/Strong" = "Powerful", 
       "Resolved/Determined" = "Resolved",
       "Culturally Similar" = "Culture", 
       "Generous/Compassionate" = "Generous")

att_df$actor <- factor(att_df$actor, levels = c("Individuals", "Countries"))
att_df$value <- as.numeric(as.character(att_df$value))

# overall mean ranks for plot order
aggregate(value ~ variable, att_df, mean)

# mean ranks for subjects in the individual (i.e. interpersonal) condition
individual_ranks <- aggregate(value ~ variable, subset(att_df, actor == "Individuals"), mean)
individual_ranks[order(individual_ranks$value),]

# mean ranks for subjects in the country (i.e. interstate) condition
country_ranks <- aggregate(value ~ variable, subset(att_df, actor == "Countries"), mean)
country_ranks[order(country_ranks$value),]


# --- KS test for differences in distributions of ranks per condition
ks_df <- data.frame()
for(i in 1:length(unique(att_df$variable))){
  testing <- subset(att_df, variable == levels(att_df$variable)[i])
  testing$value <- as.numeric(as.character(testing$value))
  results_i <- ks.test(subset(testing, actor == "Countries")$value, 
                       subset(testing, actor == "Individuals")$value)
  ks_df <- rbind(ks_df, 
                 cbind(D = results_i$statistic, p = results_i$p.value, variable = levels(att_df$variable)[i]))
  
}
# the warnings are due to the presence of ties, since the distributions are discrete. see below for alternative tests, which produce the same substantive conclusions.
ks_df$D <- round(as.numeric(as.character(ks_df$D)), 2)
ks_df$p <- as.numeric(as.character(ks_df$p))

# stars according to IO's standards for stat significance
ks_df$stars <- ifelse(ks_df$p < .01, "***", 
                             ifelse(ks_df$p < .05, "**",
                                    ifelse(ks_df$p < .10, "*", "")))

ks_df$anno <- paste("D =", ks_df$D, ks_df$stars)
ks_df <- rbind(cbind(ks_df, actor = "Countries"),
               cbind(ks_df, actor = "Individuals"))

# --- Figure 2(A): trait rank distributions by condition --- #
ggplot(att_df, aes(fill = actor))+
  theme_bw() +
  geom_bar(aes(x = value), stat = "count", position = position_dodge(width=.6), color = "black", width = .8) +
  facet_wrap(.~variable, ncol = 1) +
  labs(x = "Rank", y = "Count") +
  geom_label(x = 7.8, y = 160, aes(label = anno), data = ks_df, fill = "white") +
  scale_fill_manual(values = c("Countries" = "#99acbc", "Individuals" = "#696969")) +
  scale_x_discrete(labels = c("Most Important", "", "", "", "", "", "", "", "Least Important")) +
  theme(title = element_text(size = 18),
        strip.text = element_text(size = 13),
        legend.title = element_blank(),
        legend.text = element_text(size = 18),
        legend.position = "top",
        axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 13),
        axis.title.x = element_text(size = 22, margin = margin(t = 14, b = 0, l = 0)),
        axis.title.y = element_text(size = 22, margin = margin(t = 0, r = 14, b = 0, l = 0)),
        plot.margin = margin(r=60, l = 20))
#ggsave("threat_traits_ranks.pdf", width = 4.3, height = 14) 


# --- confirm KS test conclusions using chi-square
# KS tests are nice, b/c they're quite sensitive. but, our distributions are 
# discrete, which likely reduces the power of the test. so, here we confirm substantive conclusions
# using a more standard chi-square test (as well as a fisher test, since some of the table cells contain very small numbers)

chi_df_dis <- data.frame()
for(i in 1:length(unique(att_df$variable))){
  
  testing <- subset(att_df, variable == levels(att_df$variable)[i])
  testing$value <- as.numeric(as.character(testing$value))
  x <- as.vector(table(subset(testing, actor == "Countries")$value))
  y <- as.vector(table(subset(testing, actor == "Individuals")$value))
  n <- max(length(x), length(y))
  length(x) <- n; length(y) <- n
  
  chi_df <-
    data.frame(countries = x,
               individuals = y,
               row.names = 1:n)
  chi_df[is.na(chi_df)] <- 0
  
  chi_results <- chisq.test(chi_df, simulate.p.value = TRUE, B = 2000)
  fisher_results <- fisher.test(chi_df, simulate.p.value = TRUE)
  
  chi_df_dis <- rbind(chi_df_dis, 
                      cbind(p_chi = chi_results$p.value,
                            p_fisher = fisher_results$p.value,
                            variable = levels(att_df$variable)[i]),
                      stringsAsFactors = FALSE)
}
chi_df_dis$p_chi <- as.numeric(chi_df_dis$p_chi)
chi_df_dis$p_fisher <- as.numeric(chi_df_dis$p_fisher)

# is there evidence (i.e. p < .05) that distributions of trait ranks are different for 
# subjects in the individual vs. country conditions?
subset(chi_df_dis, p_chi < .05 | p_fisher < .05)
# ...yes, for fair/just, intelligent, organized/competent, powerful/strong
# importantly, these are the same differences detected by the KS test above, so same substantive conclusions



# --- Figure 2(B): individual trait co-occurence network --- #
att_plot_temp <- subset(person_country_attributes, actor == "Individuals")

att_plot_person <- c()
for(i in 1:nrow(att_plot_temp)){
  att_plot_person[i] <- paste(words <- colnames(att_plot_temp[i,][colSums(!is.na(att_plot_temp[i,])) > 0]), collapse = " ")
  
}

att_plot_person <- att_plot_person[att_plot_person != ""]
att_text_person_fcm <- fcm(att_plot_person, context = "document", count = "frequency")
colorder <- c("Honest", "Intelligent", "Fair", "Friendly", "Resolved", "Organized", "Powerful", "Culture", "Generous")
plot_mat <- as.matrix(att_text_person_fcm) + t(as.matrix(att_text_person_fcm))
plot_mat <- plot_mat[colorder,colorder]
plot_net <- graph_from_adjacency_matrix(plot_mat, mode="undirected", weighted = T)
coords <- layout_in_circle(plot_net, order = rev(c("Organized", "Powerful", "Culture", "Generous", "Honest", "Intelligent", "Fair", "Friendly", "Resolved")))
E(plot_net)$color <- as.character(cut(E(plot_net)$weight, 4, labels = c("gray90", "gray60", "gray40", "gray10")))
E(plot_net)$size <- as.numeric(as.character(cut(E(plot_net)$weight, 5, labels = c(2,4,6,8,10))))

plot(plot_net, layout = coords, 
     vertex.color = "gray10",
     vertex.size = 4.4,
     vertex.label.family = "sans",
     vertex.label.color = "black",
     vertex.label.cex = 1.8,
     vertex.label.dist = 3,
     vertex.label.degree = c(pi,3.6,12,12,pi/6,pi/5,pi/5,2.6,-3.8),
     edge.color = E(plot_net)$color,
     edge.width = E(plot_net)$size,
     edge.curved = .1)


# --- Figure 2(C): country trait co-occurence network --- #
att_plot_temp <- subset(person_country_attributes, actor == "Countries")

att_plot_country <- c()
for(i in 1:nrow(att_plot_temp)){
  att_plot_country[i] <- paste(words <- colnames(att_plot_temp[i,][colSums(!is.na(att_plot_temp[i,])) > 0]), collapse = " ")
  
}

att_plot_country <- att_plot_country[att_plot_country != ""]
att_text_country_fcm <- fcm(att_plot_country, context = "document", count = "frequency")
plot_mat <- as.matrix(att_text_country_fcm) + t(as.matrix(att_text_country_fcm))
plot_mat <- plot_mat[colorder,colorder]
plot_net <- graph_from_adjacency_matrix(plot_mat, mode="undirected", weighted = T)
E(plot_net)$color <- as.character(cut(E(plot_net)$weight, 4, labels = c("gray90","#e0e6eb", "#c8d3dc","gray10")))
E(plot_net)$size <- as.numeric(as.character(cut(E(plot_net)$weight, 5, labels = c(2,4,6,8, 10))))

plot(plot_net, layout = coords, 
     vertex.color = "gray10",
     vertex.size = 4.4,
     vertex.label.family = "sans",
     vertex.label.color = "black",
     vertex.label.cex = 1.8,
     vertex.label.dist = 3,
     vertex.label.degree = c(pi,3.6,12,12,pi/6,pi/5,pi/5,2.6,-3.8),
     edge.color = E(plot_net)$color,
     edge.width = E(plot_net)$size,
     edge.curved = .1)

